library(readODS) #read ods files
library(readxl) #reads excel files
library(tidyverse) #Tidy packages
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2) #pretty plots
library(ggthemes) #themes for ggplot
library(RColorBrewer)
library(scales) #axis formatting options
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(plotly) #interactive web graphs
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(networkD3) #interactive web network graphs
Introduction to the inventory, with links kilotonnes of CO2 equivalents
# Map loop to download UK 2022 data from the UK Department for Energy Security and Net Zero
downloaded <- file.exists("UKGHG_2022.ods") #checks if file is downloaded in working directory
if(downloaded != T){ #if downloaded is not true
map2("https://assets.publishing.service.gov.uk/media/65c0d54663a23d000dc821ca/final-greenhouse-gas-emissions-2022-by-source-dataset.ods", #update this link when new data available
"UKGHG_2022.ods", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in UK 2022 data from the UK Department for Energy Security and Net Zero
GHG_UK22 <- read_ods(
path = "UKGHG_2022.ods",
sheet = 1, #define tab/sheet to read
col_names = TRUE, #use header row for column names
col_types = NULL, #guess data types
na = "", #treat blank cells as NA
skip = 0, #don't skip rows
formula_as_formula = FALSE, #values only
range = NULL,
row_names = FALSE, #no row names
strings_as_factors = TRUE) %>% #use factors
rename("Subsector" = "TES Subsector") #rename variable
#Create dataframe with only categorization
GHG_categories <- GHG_UK22[, c(3,6:12)] %>%
distinct()
#IPCC code and TES subsectors - for categorization
GHG_IPCC_TES <- GHG_UK22[, c(3,6,7)] %>%
distinct() %>%
rename("IPCC_code" = "IPCC Code") #rename variable to match other df
# Map loop to download UK Regional data 1990-2021 from the UK National Atmospheric Emissions Inventory
downloaded2 <- file.exists("GHG_regions.xlsx") #checks if file is downloaded in working directory
if(downloaded2 != T){ #if downloaded is not true
map2("https://uk-air.defra.gov.uk/reports/cat09/2306200930_DA_GHGI_1990-2021_Final_v3.1.xlsx", #update this link when new data available
"GHG_regions.xlsx", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in excel file, by source, with region
GHG_regions <- read_xlsx(
"GHG_regions.xlsx", #excel file from current working directory
sheet = "BySource_data", #define tab/sheet to read
col_names = TRUE, #use header row for column names
col_types = NULL, #guess data types
na = "", #treat blank cells as NA
trim_ws = TRUE, #trim any whitespace/unused columns and rows
skip = 6, #skip 6 rows to get to pivot table data
n_max = Inf, #set maximum number of rows to include
guess_max = min(1000, Inf), #how many rows to use to guess data types
progress = readxl_progress(), #display progress in reading in data
.name_repair = "unique" #makes sure all column names not empty or duplicated
)
#Add subsector into GHG_regions, match format
GHG_regions2 <- inner_join(GHG_IPCC_TES, GHG_regions,
by='IPCC_code') %>% #use inner join to combine dataframes by a shared variable
rename("GHG" = "Pollutant") %>% #rename variable to match UK only csv
rename("NC_Sector" = "NCFormat") %>% #rename variable
rename("TES_Sector" = "TES Sector") #rename variable
#Simplify regional dataset
GHG_simp <- GHG_regions2 %>%
group_by(EmissionYear, TES_Sector, Subsector, RegionName, GHG) %>%
summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
rename("Sector" = "TES_Sector")
## `summarise()` has grouped output by 'EmissionYear', 'TES_Sector', 'Subsector',
## 'RegionName'. You can override using the `.groups` argument.
#2021 only
GHG_2021 <- GHG_simp %>%
filter(EmissionYear == "2021") #filter to only 2021
#Grouped into all of UK
GHG_2021_joined <- GHG_2021 %>%
group_by(Sector, Subsector, GHG) %>%
summarize(Temissions = sum(Temissions, na.rm = TRUE))
## `summarise()` has grouped output by 'Sector', 'Subsector'. You can override
## using the `.groups` argument.
#Define a custom palette
#Defra_Palette <- c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")
#Set up Sankey links & nodes dataframes
links <- data.frame(source=c(paste0(GHG_2021_joined$Sector), paste0(GHG_2021_joined$Subsector)),
target=c(paste0(GHG_2021_joined$Subsector), paste0(GHG_2021_joined$GHG)),
value=(paste0(GHG_2021_joined$Temissions)))
links <- links[-c(83:85),]
nodes <- data.frame(
name=c(as.character(links$source),
as.character(links$target)) %>% unique())
#With networkD3, connection must be provided using id - So we need to reformat it.
links$IDsource <- match(links$source, nodes$name)-1
links$IDtarget <- match(links$target, nodes$name)-1
#Plot with networkD3
sankey <- sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
units = "kilotonnes CO2 eq.",
sinksRight=FALSE, nodePadding = 8,
iterations = 5)
sankey
library(rjson)
json_file <- "https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json"
json_data <- fromJSON(paste(readLines(json_file), collapse=""))
fig <- plot_ly(
type = "sankey",
domain = list(
x = c(0,1),
y = c(0,1)
),
orientation = "h",
valueformat = ".0f",
valuesuffix = "TWh",
node = list(
label = json_data$data[[1]]$node$label,
color = json_data$data[[1]]$node$color,
pad = 15,
thickness = 15,
line = list(
color = "black",
width = 0.5
)
),
link = list(
source = json_data$data[[1]]$link$source,
target = json_data$data[[1]]$link$target,
value = json_data$data[[1]]$link$value,
label = json_data$data[[1]]$link$label
)
)
fig <- fig %>% layout(
title = "Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
font = list(
size = 10
),
xaxis = list(showgrid = F, zeroline = F),
yaxis = list(showgrid = F, zeroline = F)
)
fig
#Make into lists, add label list
linklist <- as.list(links)
linklist$value <- as.numeric(linklist$value)
linklist$label <- c("","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","","","","","","","","","","","","","","",
"","","","","","","")
links_p <- plot_ly(links, type = list)
nodecolors <- nodes %>%
mutate(color = c("#FFD700","#EA5F94","#9D02D7","#FA8775","#FFB14E","#CD34B5","#0000FF",
"#C9A600","#C7A600","#917800","#BEA767",
"#E5A0BE","#FF7ECB","#B42A66","#A7003B",
"#A400EC","#6B00B9","#631F90","#C860EA","#310074",
"#C35849",
"#C8811C","#FFAC1E","#BC8644","#9C4F00","#E1BA91",
"#774BE5","#2C15B2","#000097","#6836FF","#0000E3","#0000C7","#000096",
"#644540",
"#938E8B","#BFBFBF","#3B3734","#7A7979","#383838","#898887","#D3D1D0"))
nodelist <- as.list(nodecolors)
nodes_p <- plot_ly(nodecolors, type=list)
#Plot with Plotly
sankey2 <- plot_ly(type = "sankey",
orientation = "h",
valueformat = ".0f",
valuesuffix = "kt CO2 eq.",
node = list(
label = nodelist$name,
color = nodelist$color,
pad = 15,
thickness = 20,
line = list(color = "black", width = 0.5),
link = list(
source = linklist$IDsource,
target = linklist$IDtarget,
value = linklist$value,
label = linklist$label)))
sankey2 <- sankey2 %>% layout(
title = "Greenhouse Gas Emissions Per Sector in the UK",
font = list(size = 10),
xaxis = list(showgrid = F, zeroline = F),
yaxis = list(showgrid = F, zeroline = F))
sankey2
#Plot time series
Sect_time <- GHG_regions2 %>%
group_by(EmissionYear, TES_Sector) %>%
summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
rename("Sector" = "TES_Sector") %>%
ggplot(aes(x=as.numeric(EmissionYear), y=Temissions, color=Sector)) +
geom_line() +
xlab("") + ylab("Greenhouse Gas Emissions (kilotonnes CO2 eq.)") +
ggtitle("Greenhouse Gas Emissions of the UK per Sector Over Time") +
theme_few() +
scale_y_continuous(labels = comma) +
scale_x_continuous(breaks = pretty(c(1990:2020), n=6)) +
scale_color_brewer(palette = "Set2") +
theme(legend.position="bottom")
## `summarise()` has grouped output by 'EmissionYear'. You can override using the
## `.groups` argument.
Sect_time #view ggplot
#Make interactive with Plotly
t_int <- ggplotly(Sect_time) %>% #convert to interactive graph
layout(legend = list(
orientation = "h"))
#Plotly customization
style(t_int, hovertemplate="Year: %{x:,.r}
CO2eq: %{y:,.4r} kt") #define what hover label says, round x values, round y values to 4 sig. figs